Source codes: https://github.com/Anne-LiseGerard/dftd_rnaseq

suppressPackageStartupMessages({
  library("VennDiagram")
  library("ggplot2")
  library("mitch")
  library("dplyr")
  library("kableExtra")
  library("limma")
  library("usefun")
  library("viridis")
  library("ggh4x")
  library("tidyr")

})

knitr::opts_chunk$set(dev = 'svg')

Background

Goal: compare expression profiles of DFT1 vs DFT2 tumour biopsies and cell lines.Two contrasts: DFT1 vs DFT2, then compare that first contrast in cell lines vs biopsies.

Functions

display_venn <- function(x, ...){
  library(VennDiagram)
  grid.newpage()
  venn_object <- venn.diagram(x, filename = NULL, ...,
                              height = 480 , width = 480 , 
                              resolution = 300,
                              lwd = 2,
                              col=c("#B3B9DF", "#DDAFB7"),
                              fill = c(alpha("#B3B9DF",0.3), alpha("#DDAFB7",0.3)),
                              cex = 1,
                              fontfamily = "sans",
                              cat.cex = 0.9,
                              cat.default.pos = "outer",
                              cat.dist = c(0.05, 0.05),
                              cat.pos = c(-25,25),
                              cat.fontfamily = "sans",
                              cat.col = c("#B3B9DF", "#DDAFB7"),
                              main.cex = 1,
                              main.fontfamily = "sans",
                              sub.cex = 0.8,
                              sub.fontfamily = "sans",
                              ext.text=FALSE)
  grid.draw(venn_object)
}

# get rid of log files
futile.logger::flog.threshold(futile.logger::ERROR, name = "VennDiagramLogger")
## NULL

Load data

Import lists of genes for Venn diagrams , DESEq2 outputs and genesets.

clines <- readRDS("~/dftd_RNAseq_annelise/dge/venn_clines.rds") # list genes cell lines
biopsies <- readRDS("~/dftd_RNAseq_annelise/dge/venn_biopsies.rds") # list genes biopsies

dge_clines <- readRDS("~/dftd_RNAseq_annelise/dge/dge_clines.rds") # DESeq2 cell lines
dge_biopsies <- readRDS("~/dftd_RNAseq_annelise/dge/dge_biopsies.rds") # DESeq2 biopsies

gt <- readRDS("~/dftd_RNAseq_annelise/dge/gt.rds") # homology
genesets <- gmt_import("~/dftd_RNAseq_annelise/ref/ReactomePathways_2023-07-14.gmt") # reactome
genesets2 <- gmt_import("~/dftd_RNAseq_annelise/ref/c2.cp.kegg.v2023.1.Hs.symbols.gmt.txt") # kegg

# MDS data
y_clines <- readRDS("~/dftd_RNAseq_annelise/dge/y_clines.rds")
y_biopsies <- readRDS("~/dftd_RNAseq_annelise/dge/y_biopsies.rds")

MDS

#MDS=PCoA

ss_clines <- read.table("/mnt/data/annelise/dftd_RNAseq_annelise/ss.tsv",sep="\t",fill=TRUE,header=TRUE)

ss_biopsies <- read.table("/mnt/data/annelise/dftd_RNAseq_annelise/ss_patchett.txt",sep="\t",fill=TRUE,header=TRUE)

ss <- data.frame(run=c(ss_clines$C, ss_biopsies$run_accession),
                 ID=c(ss_clines$ClientID, ss_biopsies$sample_id),
                 DFT=c(ss_clines$DFT, ss_biopsies$DFT),
                 replicate=(c(ss_clines$Replicate, c(1,1,2,2,1,1,2,2,2,2,1,1,1,2))),
                 sample_type=c(rep("cell_line",19), ss_biopsies$sample_type))

ss <- na.omit(ss)

ss$DFT <- as.factor(ss$DFT)

ss %>%
  kbl(caption="Sample sheet for all samples") %>%
  kable_paper("hover", full_width = F)
Sample sheet for all samples
run ID DFT replicate sample_type
1 6180766 4906_1-1 DFT1 1 cell_line
2 6180767 C5065_1-1 DFT1 1 cell_line
3 6180768 1426_1-1 DFT1 1 cell_line
4 6180769 RV_1-1 DFT2 1 cell_line
5 6180770 SN_1-1 DFT2 1 cell_line
6 6180771 TD549_1-1 DFT2 1 cell_line
7 6180772 4906_2-1 DFT1 2 cell_line
8 6180773 C5065_2-1 DFT1 2 cell_line
9 6180774 1426_2-1 DFT1 2 cell_line
10 6180775 RV_2-1 DFT2 2 cell_line
11 6180776 SN_2-1 DFT2 2 cell_line
12 6180777 TD549_2-1 DFT2 2 cell_line
13 6180778 4906_3-1 DFT1 3 cell_line
14 6180779 C5065_3-1 DFT1 3 cell_line
15 6180780 1426_3-1 DFT1 3 cell_line
16 6180781 RV_3-1 DFT2 3 cell_line
17 6180782 SN_3-1 DFT2 3 cell_line
18 6180783 TD549_3-1 DFT2 3 cell_line
20 ERR2804403 sha_DFT1_1-1 DFT1 1 biopsy
21 ERR2804404 sha_DFT1_1-2 DFT1 1 biopsy
22 ERR2804405 sha_DFT1_2-1 DFT1 2 biopsy
23 ERR2804406 sha_DFT1_2-2 DFT1 2 biopsy
24 ERR2804407 sha_DFT2_1-1 DFT2 1 biopsy
25 ERR2804408 sha_DFT2_1-2 DFT2 1 biopsy
26 ERR2804409 sha_DFT2_2-1 DFT2 2 biopsy
27 ERR2804410 sha_DFT2_2-2 DFT2 2 biopsy
28 ERR2852729 sha_DFT2_RV_2-2 DFT2 2 cell_line
29 ERR2852728 sha_DFT2_RV_2-1 DFT2 2 cell_line
30 ERR2852727 sha_DFT2_RV_1-2 DFT2 1 cell_line
31 ERR2852726 sha_DFT2_RV_1-1 DFT2 1 cell_line
32 SRR6266557 C5065_UT_01 DFT1 1 cell_line
33 SRR6266556 C5065_UT_02 DFT1 2 cell_line
y <- cbind(y_clines, y_biopsies)
y <- y[,-c(27:32)] # remove Patchett cell lines

cs <- colSums(y)
cs <- cs[order(cs)]

par(mar=c(5,10,5,2))

barplot(cs,main="All samples",horiz=TRUE,las=1)

cols <- ss$DFT
cols <- gsub("DFT1","#B3B9DF",cols)
cols <- gsub("DFT2","#DDAFB7",cols)

pchs <- ss$sample_type
pchs <- gsub("cell_line",19,pchs)
pchs <- gsub("biopsy",17,pchs)

mymds <- plotMDS(y,plot=FALSE)

# fix the xlims
XMIN=min(mymds$x)
XMAX=max(mymds$x)
XMID=(XMAX+XMIN)/2
XMIN <- XMID + (XMIN-XMID)*1.1
XMAX <- XMID+(XMAX-XMID)*1.1

par(mar = c(5.1, 4.1, 4.1, 2.1) )
plotMDS(mymds,pch=as.numeric(pchs),cex=3,col=cols,main="Cell lines and Tumour biopsies",xlim=c(XMIN,XMAX))
text(mymds,labels=colnames(y))


mtext("blue=DFT1,pink=DFT2")

Visualisation

Some Venn diagrams to compare gene expression.

display_venn(x=list(clines[["allgenes"]], biopsies[["allgenes"]]),
             category.names = c("Cell lines" , "Biopsies"),
             main="All genes")

display_venn(x=list(clines[["sigdegs"]], biopsies[["sigdegs"]]),
             category.names = c("Cell lines" , "Biopsies"),
             main="DEGs",
             sub="(padj < 0.05)")

display_venn(x=list(clines[["updegs"]], biopsies[["updegs"]]),
             category.names = c("Cell lines" , "Biopsies"),
             main="Up DEGs",
             sub="(padj < 0.05; logFC > 0)")

display_venn(x=list(clines[["dndegs"]], biopsies[["dndegs"]]),
             category.names = c("Cell lines" , "Biopsies"),
             main="Down DEGs",
             sub="(padj < 0.05; logFC < 0)")

# genes related to metabolism
display_venn(x=list(clines[["metreactgenes"]], biopsies[["metreactgenes"]]),
             category.names = c("Cell lines" , "Biopsies"),
             main="Metabolism genes - REACTOME")

display_venn(x=list(clines[["metsigreactdegs"]], biopsies[["metsigreactdegs"]]),
             category.names = c("Cell lines" , "Biopsies"),
             main="Metabolism DEGs - REACTOME",
             sub="(padj < 0.05)")

display_venn(x=list(clines[["metsigreactupdegs"]], biopsies[["metsigreactupdegs"]]),
             category.names = c("Cell lines" , "Biopsies"),
             main="Metabolism up DEGs - REACTOME",
             sub="(padj < 0.05; logFC > 0)")

display_venn(x=list(clines[["metsigreactdndegs"]], biopsies[["metsigreactdndegs"]]),
             category.names = c("Cell lines" , "Biopsies"),
             main="Metabolism down DEGs - REACTOME",
             sub="(padj < 0.05; logFC < 0)")

Enrichment

Here we will perform a multi-contrast enrichment analysis (DFT1 vs DFT2 AND biopsies vs cell lines).

gsea_clines <- readRDS("~/dftd_RNAseq_annelise/dge/gsea_clines.rds")
gsea_biopsies <- readRDS("~/dftd_RNAseq_annelise/dge/gsea_biopsies.rds")

gsea_common <- intersect(gsea_clines$set,gsea_biopsies$set)

gsea_clines_only <- outersect(gsea_clines$set, gsea_common)
gsea_biopsies_only <- outersect(gsea_biopsies$set, gsea_common)

gsea_clines$dataset <- "Cell lines"
gsea_biopsies$dataset <- "Biopsies"
gsea_all <- rbind(gsea_clines,gsea_biopsies)

gsea_common_df <- filter(gsea_all, set %in% gsea_common)

ggplot(gsea_common_df, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustANOVA, size = setSize, shape=dataset)) + 
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  xlab("s dist") + ylab("pathway") + ggtitle("Common") + 
  theme_bw()+
  theme(legend.position = c(-1.8,0.8))
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

gsea_clines_m <- readRDS("~/dftd_RNAseq_annelise/dge/gsea_clines_metabo.rds")
gsea_biopsies_m <- readRDS("~/dftd_RNAseq_annelise/dge/gsea_biopsies_metabo.rds")

gsea_clines_m$dataset <- "Cell lines"
gsea_biopsies_m$dataset <- "Biopsies"

gsea_all_m <- rbind(gsea_clines_m,gsea_biopsies_m)
gsea_all_m$dataset <- factor(gsea_all_m$dataset, levels=c("Cell lines","Biopsies"))

ggplot(gsea_all_m, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustANOVA, size = setSize)) + 
  facet_grid(~dataset,scales = "free") +
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  xlab("s dist") + ylab("pathway") + ggtitle("Metabolism") + 
  theme_bw()

gsea_common_df$plot <- "Common pathways"
gsea_all_m$plot <- "Metabolic pathways"

gsea_plots <- rbind(gsea_all_m,gsea_common_df)
gsea_plots$plot <- factor(gsea_plots$plot, levels=c("Metabolic pathways","Common pathways"))
rownames(dge_clines) <- gsub("\\..*","", rownames(dge_clines))

x <- list("Cell lines"=dge_clines, "Biopsies"=dge_biopsies)

y <- mitch_import(x, "deseq2", geneTable = gt)
## Note: Mean no. genes in input = 16699.5
## Note: no. genes in output = 12301
## Note: estimated proportion of input genes in output = 0.737
genesets <- readRDS(file = "~/dftd_RNAseq_annelise/ref/gsea/reactome_plus_custom.RDS")

# REACTOME
#The pathway 'Response to metal ions' causes a bug as the genenames appear to map to the same human gene.
#I think the duplicate data is causing problems.
setnames <-c( "Response to metal ions", "Metallothioneins bind metals")
genesets <- genesets[! names(genesets) %in% setnames]

res_react <- mitch_calc(y, genesets, resrows = 100)
## Note: When prioritising by significance (ie: small 
##             p-values), large effect sizes might be missed.
if ( !file.exists("mitch_comp_react.html") ) {
  mitch_report(res_react, "mitch_comp_react.html")
}

#mitch_plots(res_react, "mitch_comp_react.pdf")

gsea_signif <- filter(res_react$enrichment_result, p.adjustMANOVA < 0.05)

react_metabosets <- readRDS("~/dftd_RNAseq_annelise/ref/reactome_metabo.rds")

metabo_gsea <- filter(gsea_signif, set %in% names(react_metabosets))
metabo_gsea <- gather(metabo_gsea, "dataset","s.dist", 4:5)
metabo_gsea[metabo_gsea == "s.Cell.lines"] <- "Cell lines"
metabo_gsea[metabo_gsea == "s.Biopsies"] <- "Biopsies"
ggplot(metabo_gsea, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) + 
  #facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
  geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
  scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  xlab("s dist") + ylab("pathway") + ggtitle("Metabolism") + 
  theme_bw()

gsea_upboth <- filter(gsea_signif, s.Cell.lines > 0 & s.Biopsies > 0)
gsea_upboth <- gather(gsea_upboth, "dataset","s.dist", 4:5)
gsea_upboth[gsea_upboth == "s.Cell.lines"] <- "Cell lines"
gsea_upboth[gsea_upboth == "s.Biopsies"] <- "Biopsies"

gsea_downboth <- filter(gsea_signif, s.Cell.lines < 0 & s.Biopsies < 0)
gsea_downboth <- gather(gsea_downboth, "dataset","s.dist", 4:5)
gsea_downboth[gsea_downboth == "s.Cell.lines"] <- "Cell lines"
gsea_downboth[gsea_downboth == "s.Biopsies"] <- "Biopsies"

gsea_upcelllines_downbiopsies <- filter(gsea_signif, s.Cell.lines > 0 & s.Biopsies < 0)
gsea_upcelllines_downbiopsies <- gather(gsea_upcelllines_downbiopsies, "dataset","s.dist", 4:5)
gsea_upcelllines_downbiopsies[gsea_upcelllines_downbiopsies == "s.Cell.lines"] <- "Cell lines"
gsea_upcelllines_downbiopsies[gsea_upcelllines_downbiopsies == "s.Biopsies"] <- "Biopsies"

gsea_upbiopsies_downcelllines <- filter(gsea_signif, s.Cell.lines < 0 & s.Biopsies > 0)
gsea_upbiopsies_downcelllines <- gather(gsea_upbiopsies_downcelllines, "dataset","s.dist", 4:5)
gsea_upbiopsies_downcelllines[gsea_upbiopsies_downcelllines == "s.Cell.lines"] <- "Cell lines"
gsea_upbiopsies_downcelllines[gsea_upbiopsies_downcelllines == "s.Biopsies"] <- "Biopsies"

ggplot(gsea_upboth, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) + 
  #facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
  scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
  geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  xlab("s dist") + ylab("pathway") + ggtitle("Concordant upregulation") + 
  theme_bw()

ggplot(gsea_downboth, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) + 
  #facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
  geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
  scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  xlab("s dist") + ylab("pathway") + ggtitle("Concordant downregulation") + 
  theme_bw()

ggplot(gsea_upcelllines_downbiopsies, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) + 
  #facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
  geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
  xlab("s dist") + ylab("pathway") + ggtitle("Upregulation in cell lines \nDown regulation in biopsies") + 
  theme_bw()

ggplot(gsea_upbiopsies_downcelllines, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) + 
  #facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
  geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
  xlab("s dist") + ylab("pathway") + ggtitle("Downregulation in cell lines \nUp regulation in biopsies") + 
  theme_bw()

gsea_upboth$plot <- "Concordant positive enrichment"
gsea_upbiopsies_downcelllines$plot <- "Negative enrichment in cell lines \nPositive enrichment in biopsies"
gsea_upcelllines_downbiopsies$plot <- "Positive enrichment in cell lines \nNegative enrichment in biopsies"
gsea_downboth$plot <- "Concordant negative enrichment"
metabo_gsea$plot <- "Metabolism"
gsea_x <- rbind(gsea_upboth,gsea_upbiopsies_downcelllines, gsea_upcelllines_downbiopsies, gsea_downboth, metabo_gsea)

library(ggforce)

ggplot(gsea_x, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) + 
  facet_wrap(~plot, scales = "free", ncol=1, nrow=5) +
  scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
  force_panelsizes(rows = c(3.5, 71.5, 11, 7, 2))+
  geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  xlab("s dist") + ylab("") + ggtitle("") + 
  theme_bw()

Session information

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggforce_0.4.2       tidyr_1.3.1         ggh4x_0.2.8        
##  [4] viridis_0.6.5       viridisLite_0.4.2   usefun_0.5.0       
##  [7] limma_3.60.4        kableExtra_1.4.0    dplyr_1.1.4        
## [10] mitch_1.16.0        ggplot2_3.5.1       VennDiagram_1.7.3  
## [13] futile.logger_1.4.3
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.5         beeswarm_0.4.0       xfun_0.49           
##  [4] bslib_0.8.0          caTools_1.18.3       htmlwidgets_1.6.4   
##  [7] GGally_2.2.1         bitops_1.0-8         vctrs_0.6.5         
## [10] tools_4.4.2          generics_0.1.3       parallel_4.4.2      
## [13] tibble_3.2.1         fansi_1.0.6          highr_0.11          
## [16] pkgconfig_2.0.3      KernSmooth_2.23-24   RColorBrewer_1.1-3  
## [19] lifecycle_1.0.4      farver_2.1.2         compiler_4.4.2      
## [22] stringr_1.5.1        gplots_3.1.3.1       statmod_1.5.0       
## [25] munsell_0.5.1        httpuv_1.6.15        PRROC_1.3.1         
## [28] htmltools_0.5.8.1    sass_0.4.9           yaml_2.3.10         
## [31] crayon_1.5.3         later_1.3.2          pillar_1.9.0        
## [34] jquerylib_0.1.4      MASS_7.3-61          cachem_1.1.0        
## [37] mime_0.12            ggstats_0.6.0        gtools_3.9.5        
## [40] tidyselect_1.2.1     digest_0.6.37        stringi_1.8.4       
## [43] reshape2_1.4.4       purrr_1.0.2          labeling_0.4.3      
## [46] polyclip_1.10-7      fastmap_1.2.0        colorspace_2.1-1    
## [49] cli_3.6.3            magrittr_2.0.3       utf8_1.2.4          
## [52] withr_3.0.1          scales_1.3.0         promises_1.3.0      
## [55] rmarkdown_2.28       lambda.r_1.2.4       gridExtra_2.3       
## [58] shiny_1.9.1          evaluate_0.24.0      knitr_1.48          
## [61] rlang_1.1.4          futile.options_1.0.1 Rcpp_1.0.13         
## [64] xtable_1.8-4         glue_1.7.0           tweenr_2.0.3        
## [67] echarts4r_0.4.5      formatR_1.14         xml2_1.3.6          
## [70] svglite_2.1.3        rstudioapi_0.16.0    jsonlite_1.8.8      
## [73] R6_2.5.1             plyr_1.8.9           systemfonts_1.1.0